import os
from collections import Counter

import pandas as pd


class TopicStatistical:
    def __init__(self):
        self.topic_list = []
        self.topic_dict = {}
        self.my_BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        # csv_path：读取的csv文件(爬虫产物)
        self.csv_path = os.path.join(self.my_BASE_DIR, 'static', 'csv_collect', 'deep_articles.csv')
        # self.csv_path = os.path.join(self.my_BASE_DIR, 'static', 'csv_collect', 'deep_articles.xlsx')

    def statistical(self):
        # 读取数据
        df = pd.read_csv(self.csv_path, sep=",", header=0, encoding="utf-8")

        # 清洗数据 生成目标列表
        for i in range(len(df)):
            self.extract_topic(df["topic"][i])

        # 使用Counter函数统计每个数字出现的次数
        self.topic_dict = dict(Counter(self.topic_list))
        # print(self.topic_dict)

        return self.topic_dict

    def extract_topic(self, topic):
        # 给定的时间字符串可能为NOTFOUND或者空字符
        if "" != topic and "NOTFOUND" != topic:
            self.topic_list.append(topic)


# 以脚本方式启动
if __name__ == "__main__":
    # 捕捉异常错误
    try:
        ts = TopicStatistical()
        ts.statistical()
    except Exception as e:
        print("错误:", e)
